import scrapy
import requests
from bid.items import BidItem
from bid.tools import *


class BeijingSpider(scrapy.Spider):
    name = 'beijing'
    allowed_domains = ['b.com']
    start_urls = ['https://ggzyfw.beijing.gov.cn']
    t_lis = [
        ['https://ggzyfw.beijing.gov.cn/jyxxgcjszbjh/index%s.html', '工程建设-招标计划','0'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent_%s.jspx?c1=jyxx&c2=jyxxgcjs&c3=jyxxggjtbyqs&c4=&e=A01&ext8=&inDates=9999&channelId=121&q=',
             '工程建设-招标公告-房屋建筑','1'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent_%s.jspx?c1=jyxx&c2=jyxxgcjs&c3=jyxxggjtbyqs&c4=&e=A02&ext8=&inDates=9999&channelId=121&q=',
             '工程建设-招标公告-市政','1'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent_%s.jspx?c1=jyxx&c2=jyxxgcjs&c3=jyxxggjtbyqs&c4=&e=A03&ext8=&inDates=9999&channelId=121&q=',
             '工程建设-招标公告-公路','1'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent_%s.jspx?c1=jyxx&c2=jyxxgcjs&c3=jyxxggjtbyqs&c4=&e=A04&ext8=&inDates=9999&channelId=121&q=',
             '工程建设-招标公告-铁路','1'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent_%s.jspx?c1=jyxx&c2=jyxxgcjs&c3=jyxxggjtbyqs&c4=&e=A07&ext8=&inDates=9999&channelId=121&q=',
             '工程建设-招标公告-水利','1'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent_%s.jspx?c1=jyxx&c2=jyxxgcjs&c3=jyxxggjtbyqs&c4=&e=A98&ext8=&inDates=9999&channelId=121&q=',
             '工程建设-招标公告-园林绿化','1'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent_%s.jspx?c1=jyxx&c2=jyxxgcjs&c3=jyxxggjtbyqs&c4=&e=A97&ext8=&inDates=9999&channelId=121&q=',
             '工程建设-招标公告-勘察设计','1'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent_%s.jspx?c1=jyxx&c2=jyxxgcjs&c3=jyxxggjtbyqs&c4=&e=A99&ext8=&inDates=9999&channelId=121&q=',
             '工程建设-招标公告-其他','1'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent_%s.jspx?c1=jyxx&c2=jyxxgcjs&c3=jyxxzbhxrgs&c4=&e=A01&ext8=&inDates=9999&channelId=123&q=',
             '工程建设-中标候选人公示-房屋建筑','2'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent_%s.jspx?c1=jyxx&c2=jyxxgcjs&c3=jyxxzbhxrgs&c4=&e=A02&ext8=&inDates=9999&channelId=123&q=',
             '工程建设-中标候选人公示-市政','2'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent_%s.jspx?c1=jyxx&c2=jyxxgcjs&c3=jyxxzbhxrgs&c4=&e=A03&ext8=&inDates=9999&channelId=123&q=',
             '工程建设-中标候选人公示-公路','2'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent_%s.jspx?c1=jyxx&c2=jyxxgcjs&c3=jyxxzbhxrgs&c4=&e=A04&ext8=&inDates=9999&channelId=123&q=',
             '工程建设-中标候选人公示-铁路','2'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent_%s.jspx?c1=jyxx&c2=jyxxgcjs&c3=jyxxzbhxrgs&c4=&e=A07&ext8=&inDates=9999&channelId=123&q=',
             '工程建设-中标候选人公示-水利','2'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent_%s.jspx?c1=jyxx&c2=jyxxgcjs&c3=jyxxzbhxrgs&c4=&e=A98&ext8=&inDates=9999&channelId=123&q=',
             '工程建设-中标候选人公示-园林绿化','2'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent_%s.jspx?c1=jyxx&c2=jyxxgcjs&c3=jyxxzbhxrgs&c4=&e=A97&ext8=&inDates=9999&channelId=123&q=',
             '工程建设-中标候选人公示-勘察设计','2'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent_%s.jspx?c1=jyxx&c2=jyxxgcjs&c3=jyxxzbhxrgs&c4=&e=A99&ext8=&inDates=9999&channelId=123&q=',
             '工程建设-中标候选人公示-其他','2'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent_%s.jspx?c1=jyxx&c2=jyxxgcjs&c3=jyxxzbgg&c4=&e=A01&ext8=&inDates=9999&channelId=124&q=',
             '工程建设-中标结果-房屋建筑','2'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent_%s.jspx?c1=jyxx&c2=jyxxgcjs&c3=jyxxzbgg&c4=&e=A02&ext8=&inDates=9999&channelId=124&q=',
             '工程建设-中标结果-市政','3'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent_%s.jspx?c1=jyxx&c2=jyxxgcjs&c3=jyxxzbgg&c4=&e=A03&ext8=&inDates=9999&channelId=124&q=',
             '工程建设-中标结果-公路','3'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent_%s.jspx?c1=jyxx&c2=jyxxgcjs&c3=jyxxzbgg&c4=&e=A04&ext8=&inDates=9999&channelId=124&q=',
             '工程建设-中标结果-铁路','3'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent_%s.jspx?c1=jyxx&c2=jyxxgcjs&c3=jyxxzbgg&c4=&e=A07&ext8=&inDates=9999&channelId=124&q=',
             '工程建设-中标结果-水利','3'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent_%s.jspx?c1=jyxx&c2=jyxxgcjs&c3=jyxxzbgg&c4=&e=A98&ext8=&inDates=9999&channelId=124&q=',
             '工程建设-中标结果-园林绿化','3'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent_%s.jspx?c1=jyxx&c2=jyxxgcjs&c3=jyxxzbgg&c4=&e=A97&ext8=&inDates=9999&channelId=124&q=',
             '工程建设-中标结果-勘察设计','3'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent_%s.jspx?c1=jyxx&c2=jyxxgcjs&c3=jyxxzbgg&c4=&e=A99&ext8=&inDates=9999&channelId=124&q=',
             '工程建设-中标结果-其他','3'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent_%s.jspx?c1=jyxx&c2=jyxxgcjs&c3=jyxxgcjshtgs&c4=&e=A01&ext8=&inDates=9999&channelId=290&q=',
             '工程建设-合同公示-房屋建筑','3'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent_%s.jspx?c1=jyxx&c2=jyxxgcjs&c3=jyxxgcjshtgs&c4=&e=A02&ext8=&inDates=9999&channelId=290&q=',
             '工程建设-合同公示-市政','3'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent_%s.jspx?c1=jyxx&c2=jyxxgcjs&c3=jyxxgcjshtgs&c4=&e=A03&ext8=&inDates=9999&channelId=290&q=',
            '工程建设-合同公示-公路','3'],
    [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent_%s.jspx?c1=jyxx&c2=jyxxgcjs&c3=jyxxgcjshtgs&c4=&e=A04&ext8=&inDates=9999&channelId=290&q=',
             '工程建设-合同公示-铁路','3'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent_%s.jspx?c1=jyxx&c2=jyxxgcjs&c3=jyxxgcjshtgs&c4=&e=A07&ext8=&inDates=9999&channelId=290&q=',
             '工程建设-合同公示-水利','3'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent_%s.jspx?c1=jyxx&c2=jyxxgcjs&c3=jyxxgcjshtgs&c4=&e=A98&ext8=&inDates=9999&channelId=290&q=',
             '工程建设-合同公示-园林绿化','3'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent_%s.jspx?c1=jyxx&c2=jyxxgcjs&c3=jyxxgcjshtgs&c4=&e=A97&ext8=&inDates=9999&channelId=290&q=',
             '工程建设-合同公示-勘察设计','3'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent_%s.jspx?c1=jyxx&c2=jyxxgcjs&c3=jyxxgcjshtgs&c4=&e=A99&ext8=&inDates=9999&channelId=290&q=',
             '工程建设-合同公示-其他','3'],
         ['https://ggzyfw.beijing.gov.cn/jyxxgcjsbggs/index%s.html', '工程建设-变更公示','2'],
         ['https://ggzyfw.beijing.gov.cn/jyxxgcjsjsgs/index%s.html', '工程建设-决算公示','2'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent%s.jspx?c1=jyxx&c2=jyxxzfcg&c3=jyxxcggg&c4=&e=D01&ext8=&inDates=9999&channelId=126&q=',
             '政府采购-采购公告-货物类','1'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent%s.jspx?c1=jyxx&c2=jyxxzfcg&c3=jyxxcggg&c4=&e=D02&ext8=&inDates=9999&channelId=126&q=',
             '政府采购-采购公告-工程类','1'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent%s.jspx?c1=jyxx&c2=jyxxzfcg&c3=jyxxcggg&c4=&e=D03&ext8=&inDates=9999&channelId=126&q=',
             '政府采购-采购公告-服务类','1'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent%s.jspx?c1=jyxx&c2=jyxxzfcg&c3=jyxxgzsx&c4=&e=D01&ext8=&inDates=9999&channelId=126&q=',
             '政府采购-更正事项-货物类','2'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent%s.jspx?c1=jyxx&c2=jyxxzfcg&c3=jyxxgzsx&c4=&e=D02&ext8=&inDates=9999&channelId=126&q=',
             '政府采购-更正事项-工程类','2'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent%s.jspx?c1=jyxx&c2=jyxxzfcg&c3=jyxxgzsx&c4=&e=D03&ext8=&inDates=9999&channelId=126&q=',
             '政府采购-更正事项-服务类','2'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent%s.jspx?c1=jyxx&c2=jyxxzfcg&c3=jyxxzbjggg&c4=&e=D01&ext8=&inDates=9999&channelId=126&q=',
             '政府采购-成交结果公告-货物类','3'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent%s.jspx?c1=jyxx&c2=jyxxzfcg&c3=jyxxzbjggg&c4=&e=D02&ext8=&inDates=9999&channelId=126&q=',
             '政府采购-成交结果公告-工程类','3'],
         [
             'https://ggzyfw.beijing.gov.cn/cmsbj/queryContent%s.jspx?c1=jyxx&c2=jyxxzfcg&c3=jyxxzbjggg&c4=&e=D03&ext8=&inDates=9999&channelId=126&q=',
             '政府采购-成交结果公告-服务类','3'],
    ]
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36'}
    msg = []
    def start_requests(self):
        for lis in self.t_lis:
            for page in range(1, 9999):
                print(page)
                if lis[0].endswith('html'):
                    if page == 1:
                        res = requests.get(lis[0] % '', headers=self.headers)
                    else:
                        res = requests.get(lis[0] % f'_{page}', headers=self.headers)
                else:
                    res = requests.get(lis[0] % page, headers=self.headers)
                res.encoding = 'utf-8'
                ls = re.findall('<li>.*?href="(.*?)" title="(.*?)".*?<p style="float: right;">(.*?)<', res.text, re.S)
                print(ls)
                last_page = re.findall('>共\d+条记录 \d+/(\d+)页<', res.text, re.S)[0]
                if page > int(last_page):
                    break
                for l in ls:
                    item = {}
                    item['link'] = 'https://ggzyfw.beijing.gov.cn' + l[0]
                    item['title'] = l[1]
                    item['time'] = l[2]
                    item['classification'] = '北京-'+lis[1]
                    item['typ'] = lis[-1]
                    if redis_dupefilter(item) or item['time'].startswith('2021'):
                        self.msg.append(lis)
                        break
                    yield scrapy.Request(url=item['link'], callback=self.parse, meta={'item': item})
                if lis in self.msg:
                    print(lis[1],'完成')
                    break
                time.sleep(len(ls))

    def parse(self, response):
        item = BidItem()
        item.update(response.meta['item'])
        item['content'] = get_content(response.text, '//div[@class="div-article2"]')
        item = get_field(dict(item))
        yield item
